library(lavaan)
library(dplyr)
library(reticulate)
library(marginaleffects)
library(modelsummary)
library(ggplot2)
library(egg)
library(lme4)
library(semPlot)
library(tinytable)
library(kableExtra)
library(reshape2)
options(rstudio.python.installationPath = "/Users/nathanielforde/mambaforge/envs")
options("modelsummary_factory_default" = "tinytable")
options(repr.plot.width=15, repr.plot.height=8)
knitr::knit_engines$set(python = reticulate::eng_python)
options(scipen=999)
set.seed(130)
Warning
This is a work in progress. We intend to elaborate the choices related to analysing and understanding the data elicted through intentional psychometrics surveys
Measurment and Measurment Constructs
df = read.csv('sem_data.csv')
inflate <- df$region == 'west'
noise <- rnorm(sum(inflate), 0.5, 1) # generate the noise to add
df$ls_p3[inflate] <- df$ls_p3[inflate] + noise
df$ls_sum <- df$ls_p1 + df$ls_p2 + df$ls_p3
df$ls_mean <- rowMeans(df[ c('ls_p1', 'ls_p2', 'ls_p3')])
head(df) |> kable()| ID | region | gender | age | se_acad_p1 | se_acad_p2 | se_acad_p3 | se_social_p1 | se_social_p2 | se_social_p3 | sup_friends_p1 | sup_friends_p2 | sup_friends_p3 | sup_parents_p1 | sup_parents_p2 | sup_parents_p3 | ls_p1 | ls_p2 | ls_p3 | ls_sum | ls_mean |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | west | female | 13 | 4.857143 | 5.571429 | 4.500000 | 5.80 | 5.500000 | 5.40 | 6.5 | 6.5 | 7.0 | 7.0 | 7.0 | 6.0 | 5.333333 | 6.75 | 7.647112 | 19.73044 | 6.576815 |
| 2 | west | male | 14 | 4.571429 | 4.285714 | 4.666667 | 5.00 | 5.500000 | 4.80 | 4.5 | 4.5 | 5.5 | 5.0 | 6.0 | 4.5 | 4.333333 | 5.00 | 4.469623 | 13.80296 | 4.600985 |
| 10 | west | female | 14 | 4.142857 | 6.142857 | 5.333333 | 5.20 | 4.666667 | 6.00 | 4.0 | 4.5 | 3.5 | 7.0 | 7.0 | 6.5 | 6.333333 | 5.50 | 4.710020 | 16.54335 | 5.514451 |
| 11 | west | female | 14 | 5.000000 | 5.428571 | 4.833333 | 6.40 | 5.833333 | 6.40 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 4.333333 | 6.50 | 5.636198 | 16.46953 | 5.489844 |
| 12 | west | female | 14 | 5.166667 | 5.600000 | 4.800000 | 5.25 | 5.400000 | 5.25 | 7.0 | 7.0 | 7.0 | 6.5 | 6.5 | 7.0 | 5.666667 | 6.00 | 5.266592 | 16.93326 | 5.644419 |
| 14 | west | male | 14 | 4.857143 | 4.857143 | 4.166667 | 5.20 | 5.000000 | 4.20 | 5.5 | 6.5 | 7.0 | 6.5 | 6.5 | 6.5 | 5.000000 | 5.50 | 5.913709 | 16.41371 | 5.471236 |
#| code-fold: true
#| code-summary: "Show the code"
datasummary_skim(df)|>
style_tt(
i = 15:17,
j = 1:1,
background = "#20AACC",
color = "white",
italic = TRUE) |>
style_tt(
i = 18:19,
j = 1:1,
background = "#2888A0",
color = "white",
italic = TRUE) |>
style_tt(
i = 2:14,
j = 1:1,
background = "#17C2AD",
color = "white",
italic = TRUE)| Unique | Missing Pct. | Mean | SD | Min | Median | Max | Histogram | |
|---|---|---|---|---|---|---|---|---|
| ID | 283 | 0 | 187.9 | 106.3 | 1.0 | 201.0 | 367.0 | |
| age | 5 | 0 | 14.7 | 0.8 | 13.0 | 15.0 | 17.0 | |
| se_acad_p1 | 32 | 0 | 5.2 | 0.8 | 3.1 | 5.1 | 7.0 | |
| se_acad_p2 | 36 | 0 | 5.3 | 0.7 | 3.1 | 5.4 | 7.0 | |
| se_acad_p3 | 29 | 0 | 5.2 | 0.8 | 2.8 | 5.2 | 7.0 | |
| se_social_p1 | 24 | 0 | 5.3 | 0.8 | 1.8 | 5.4 | 7.0 | |
| se_social_p2 | 27 | 0 | 5.5 | 0.7 | 2.7 | 5.5 | 7.0 | |
| se_social_p3 | 31 | 0 | 5.4 | 0.8 | 3.0 | 5.5 | 7.0 | |
| sup_friends_p1 | 13 | 0 | 5.8 | 1.1 | 1.0 | 6.0 | 7.0 | |
| sup_friends_p2 | 10 | 0 | 6.0 | 0.9 | 2.5 | 6.0 | 7.0 | |
| sup_friends_p3 | 13 | 0 | 6.0 | 1.1 | 1.0 | 6.0 | 7.0 | |
| sup_parents_p1 | 11 | 0 | 6.0 | 1.1 | 1.0 | 6.0 | 7.0 | |
| sup_parents_p2 | 11 | 0 | 5.9 | 1.1 | 2.0 | 6.0 | 7.0 | |
| sup_parents_p3 | 13 | 0 | 5.7 | 1.1 | 1.0 | 6.0 | 7.0 | |
| ls_p1 | 15 | 0 | 5.2 | 0.9 | 2.0 | 5.3 | 7.0 | |
| ls_p2 | 21 | 0 | 5.8 | 0.7 | 2.5 | 5.8 | 7.0 | |
| ls_p3 | 161 | 0 | 5.5 | 1.1 | 1.7 | 5.6 | 9.6 | |
| ls_sum | 218 | 0 | 16.5 | 2.2 | 8.2 | 16.7 | 21.0 | |
| ls_mean | 217 | 0 | 5.5 | 0.7 | 2.7 | 5.6 | 7.0 | |
| N | % | |||||||
| region | east | 142 | 50.2 | |||||
| west | 141 | 49.8 | ||||||
| gender | female | 132 | 46.6 | |||||
| male | 151 | 53.4 |
drivers = c('se_acad_p1', 'se_acad_p2', 'se_acad_p3', 'se_social_p1', 'se_social_p2', 'se_social_p3', 'sup_friends_p1','sup_friends_p2', 'sup_friends_p3', 'sup_parents_p1' , 'sup_parents_p2' , 'sup_parents_p3', 'ls_p1', 'ls_p2', 'ls_p3')
plot_heatmap <- function(df, title="Sample Covariances", subtitle="Observed Measures") {
heat_df = df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")
g <- heat_df |> mutate(value = round(value, 2)) |> ggplot(aes(x = x, y = y, fill = value)) +
geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
scale_fill_gradient2(
high = 'dodgerblue4',
mid = 'white',
low = 'firebrick2'
) + theme(axis.text.x = element_text(angle=45)) + ggtitle(title, subtitle)
g
}
g1 = plot_heatmap(cov(df[, drivers]))
g2 = plot_heatmap(cor(df[, drivers]), "Sample Correlations")
plot <- ggarrange(g1,g2, ncol=1, nrow=2);Fit Initial Regression Models
formula_sum_1st = " ls_sum ~ se_acad_p1 + se_social_p1 + sup_friends_p1 + sup_parents_p1"
formula_mean_1st = " ls_mean ~ se_acad_p1 + se_social_p1 + sup_friends_p1 + sup_parents_p1"
formula_sum_12 = " ls_sum ~ se_acad_p1 + se_acad_p2 + se_social_p1 + se_social_p2 +
sup_friends_p1 + sup_friends_p2 + sup_parents_p1 + sup_parents_p2"
formula_mean_12 = " ls_mean ~ se_acad_p1 + se_acad_p2 + se_social_p1 + se_social_p2 +
sup_friends_p1 + sup_friends_p2 + sup_parents_p1 + sup_parents_p2"
formula_sum = " ls_sum ~ se_acad_p1 + se_acad_p2 + se_acad_p3 + se_social_p1 + se_social_p2 + se_social_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + sup_parents_p1 + sup_parents_p2 + sup_parents_p3"
formula_mean = " ls_mean ~ se_acad_p1 + se_acad_p2 + se_acad_p3 + se_social_p1 + se_social_p2 + se_social_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + sup_parents_p1 + sup_parents_p2 + sup_parents_p3"
mod_sum = lm(formula_sum, df)
mod_sum_1st = lm(formula_sum_1st, df)
mod_sum_12 = lm(formula_sum_12, df)
mod_mean = lm(formula_mean, df)
mod_mean_1st = lm(formula_mean_1st, df)
mod_mean_12 = lm(formula_mean_12, df)
min_max_norm <- function(x) {
(x - min(x)) / (max(x) - min(x))
}
df_norm <- as.data.frame(lapply(df[c(5:19)], min_max_norm))
df_norm$ls_sum <- df$ls_sum
df_norm$ls_mean <- df$ls_mean
mod_sum_norm = lm(formula_sum, df_norm)
mod_mean_norm = lm(formula_mean, df_norm)
models = list(
"Outcome: sum_score" = list("model_sum_1st_factors" = mod_sum_1st,
"model_sum_1st_2nd_factors" = mod_sum_12,
"model_sum_score" = mod_sum,
"model_sum_score_norm" = mod_sum_norm
),
"Outcome: mean_score" = list(
"model_mean_1st_factors" = mod_mean_1st,
"model_mean_1st_2nd_factors" = mod_mean_12,
"model_mean_score"= mod_mean,
"model_mean_score_norm" = mod_mean_norm
)
)#| code-fold: true
#| code-summary: "Show the code"
modelsummary(models, stars=TRUE, shape ="cbind") |>
style_tt(
i = 2:25,
j = 1:1,
background = "#17C2AD",
color = "white",
italic = TRUE)| Outcome: sum_score | Outcome: mean_score | |||||||
|---|---|---|---|---|---|---|---|---|
| model_sum_1st_factors | model_sum_1st_2nd_factors | model_sum_score | model_sum_score_norm | model_mean_1st_factors | model_mean_1st_2nd_factors | model_mean_score | model_mean_score_norm | |
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | ||||||||
| (Intercept) | 6.335*** | 4.038*** | 3.709*** | 9.081*** | 2.112*** | 1.346*** | 1.236*** | 3.027*** |
| (0.979) | (1.080) | (1.071) | (0.739) | (0.326) | (0.360) | (0.357) | (0.246) | |
| se_acad_p1 | 0.231 | -0.023 | -0.068 | -0.264 | 0.077 | -0.008 | -0.023 | -0.088 |
| (0.158) | (0.197) | (0.216) | (0.833) | (0.053) | (0.066) | (0.072) | (0.278) | |
| se_social_p1 | 1.039*** | 0.515* | 0.401+ | 2.085+ | 0.346*** | 0.172* | 0.134+ | 0.695+ |
| (0.175) | (0.224) | (0.224) | (1.167) | (0.058) | (0.075) | (0.075) | (0.389) | |
| sup_friends_p1 | 0.069 | -0.263+ | -0.273 | -1.636 | 0.023 | -0.088+ | -0.091 | -0.545 |
| (0.095) | (0.145) | (0.169) | (1.011) | (0.032) | (0.048) | (0.056) | (0.337) | |
| sup_parents_p1 | 0.518*** | 0.196 | 0.050 | 0.299 | 0.173*** | 0.065 | 0.017 | 0.100 |
| (0.107) | (0.155) | (0.161) | (0.964) | (0.036) | (0.052) | (0.054) | (0.321) | |
| se_acad_p2 | 0.415+ | 0.382+ | 1.475+ | 0.138+ | 0.127+ | 0.492+ | ||
| (0.216) | (0.227) | (0.875) | (0.072) | (0.076) | (0.292) | |||
| se_social_p2 | 0.662** | 0.483+ | 2.093+ | 0.221** | 0.161+ | 0.698+ | ||
| (0.233) | (0.246) | (1.065) | (0.078) | (0.082) | (0.355) | |||
| sup_friends_p2 | 0.358* | 0.366* | 1.647* | 0.119* | 0.122* | 0.549* | ||
| (0.172) | (0.180) | (0.808) | (0.057) | (0.060) | (0.269) | |||
| sup_parents_p2 | 0.375* | 0.166 | 0.829 | 0.125* | 0.055 | 0.276 | ||
| (0.151) | (0.162) | (0.808) | (0.050) | (0.054) | (0.269) | |||
| se_acad_p3 | -0.072 | -0.302 | -0.024 | -0.101 | ||||
| (0.196) | (0.815) | (0.065) | (0.272) | |||||
| se_social_p3 | 0.350+ | 1.400+ | 0.117+ | 0.467+ | ||||
| (0.180) | (0.721) | (0.060) | (0.240) | |||||
| sup_friends_p3 | 0.056 | 0.334 | 0.019 | 0.111 | ||||
| (0.146) | (0.878) | (0.049) | (0.293) | |||||
| sup_parents_p3 | 0.452** | 2.710** | 0.151** | 0.903** | ||||
| (0.141) | (0.847) | (0.047) | (0.282) | |||||
| Num.Obs. | 283 | 283 | 283 | 283 | 283 | 283 | 283 | 283 |
| R2 | 0.332 | 0.389 | 0.420 | 0.420 | 0.332 | 0.389 | 0.420 | 0.420 |
| R2 Adj. | 0.323 | 0.371 | 0.394 | 0.394 | 0.323 | 0.371 | 0.394 | 0.394 |
| AIC | 1134.2 | 1117.0 | 1110.3 | 1110.3 | 512.4 | 495.2 | 488.5 | 488.5 |
| BIC | 1156.1 | 1153.5 | 1161.3 | 1161.3 | 534.2 | 531.7 | 539.5 | 539.5 |
| Log.Lik. | -561.090 | -548.507 | -541.139 | -541.139 | -250.183 | -237.600 | -230.232 | -230.232 |
| RMSE | 1.76 | 1.68 | 1.64 | 1.64 | 0.59 | 0.56 | 0.55 | 0.55 |
models = list(
"model_sum_1st_factors" = mod_sum_1st,
"model_sum_1st_2nd_factors" = mod_sum_12,
"model_sum_score" = mod_sum,
"model_sum_score_norm" = mod_sum_norm,
"model_mean_1st_factors" = mod_mean_1st,
"model_mean_1st_2nd_factors" = mod_mean_12,
"model_mean_score"= mod_mean,
"model_mean_score_norm" = mod_mean_norm
)
modelplot(models, coef_omit = 'Intercept') + geom_vline(xintercept = 0, linetype="dotted",
color = "black") + ggtitle("Comparing Model Parameter Estimates", "Across Covariates")Significant Coefficients
g1 = modelplot(mod_sum, coef_omit = 'Intercept') + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"), guide = "none")
g2 = modelplot(mod_mean, coef_omit = 'Intercept') + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"))
plot <- ggarrange(g1,g2, ncol=2, nrow=1);Aggregate Driver Scores
df$se_acad_mean <- rowMeans(df[c('se_acad_p1', 'se_acad_p2', 'se_acad_p3')])
df$se_social_mean <- rowMeans(df[c('se_social_p1', 'se_social_p2', 'se_social_p3')])
df$sup_friends_mean <- rowMeans(df[c('sup_friends_p1', 'sup_friends_p2', 'sup_friends_p3')])
df$sup_parents_mean <- rowMeans(df[c('sup_parents_p1', 'sup_parents_p2', 'sup_parents_p3')])
formula_parcel_mean = "ls_mean ~ se_acad_mean + se_social_mean + sup_friends_mean + sup_parents_mean"
formula_parcel_sum = "ls_sum ~ se_acad_mean + se_social_mean + sup_friends_mean + sup_parents_mean"
mod_sum_parcel = lm(formula_parcel_sum, df)
mod_mean_parcel = lm(formula_parcel_mean, df)
models_parcel = list("model_sum_score" = mod_sum_parcel,
"model_mean_score"= mod_mean_parcel
)
modelsummary(models_parcel, stars=TRUE)| model_sum_score | model_mean_score | |
|---|---|---|
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | ||
| (Intercept) | 4.378*** | 1.459*** |
| (1.036) | (0.345) | |
| se_acad_mean | 0.252 | 0.084 |
| (0.176) | (0.059) | |
| se_social_mean | 1.205*** | 0.402*** |
| (0.195) | (0.065) | |
| sup_friends_mean | 0.049 | 0.016 |
| (0.108) | (0.036) | |
| sup_parents_mean | 0.685*** | 0.228*** |
| (0.111) | (0.037) | |
| Num.Obs. | 283 | 283 |
| R2 | 0.397 | 0.397 |
| R2 Adj. | 0.388 | 0.388 |
| AIC | 1105.3 | 483.5 |
| BIC | 1127.1 | 505.3 |
| Log.Lik. | -546.638 | -235.731 |
| RMSE | 1.67 | 0.56 |
g1 = modelplot(mod_sum_parcel, coef_omit = 'Intercept') + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"), guide = "none")
g2 = modelplot(mod_mean_parcel, coef_omit = 'Intercept') + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"))
plot <- ggarrange(g1,g2, ncol=2, nrow=1);Hierarchical Models
formula_hierarchy_mean = "ls_mean ~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + se_acad_p1 + se_acad_p2 + se_acad_p3 +
se_social_p1 + se_social_p2 + se_social_p3 + (1 | region)"
formula_hierarchy_sum = "ls_sum ~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + se_acad_p1 + se_acad_p2 + se_acad_p3 +
se_social_p1 + se_social_p2 + se_social_p3 + (1 | region)"
hierarchical_mean_fit <- lmer(formula_hierarchy_mean, data = df, REML = TRUE)
hierarchical_sum_fit <- lmer(formula_hierarchy_sum, data = df, REML = TRUE)
g1 = modelplot(hierarchical_mean_fit, re.form=NA) + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"), guide = "none")
g2 = modelplot(hierarchical_sum_fit, re.form=NA) + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"))
plot <- ggarrange(g1,g2, ncol=2, nrow=1);Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
modelsummary(list("hierarchical_mean_fit"= hierarchical_mean_fit,
"hierarchical_sum_fit"= hierarchical_sum_fit),
stars = TRUE) |>
style_tt(
i = 2:25,
j = 1:1,
background = "#17C2AD",
color = "white",
italic = TRUE)| hierarchical_mean_fit | hierarchical_sum_fit | |
|---|---|---|
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | ||
| (Intercept) | 0.903* | 2.710* |
| (0.385) | (1.154) | |
| sup_parents_p1 | -0.007 | -0.021 |
| (0.053) | (0.160) | |
| sup_parents_p2 | 0.070 | 0.210 |
| (0.053) | (0.159) | |
| sup_parents_p3 | 0.160*** | 0.480*** |
| (0.046) | (0.139) | |
| sup_friends_p1 | -0.091+ | -0.274+ |
| (0.055) | (0.166) | |
| sup_friends_p2 | 0.125* | 0.376* |
| (0.059) | (0.177) | |
| sup_friends_p3 | 0.024 | 0.072 |
| (0.048) | (0.144) | |
| se_acad_p1 | -0.041 | -0.122 |
| (0.071) | (0.213) | |
| se_acad_p2 | 0.131+ | 0.393+ |
| (0.074) | (0.223) | |
| se_acad_p3 | 0.039 | 0.116 |
| (0.067) | (0.202) | |
| se_social_p1 | 0.094 | 0.283 |
| (0.075) | (0.224) | |
| se_social_p2 | 0.191* | 0.574* |
| (0.081) | (0.244) | |
| se_social_p3 | 0.130* | 0.389* |
| (0.059) | (0.178) | |
| SD (Intercept region) | 0.160 | 0.481 |
| SD (Observations) | 0.549 | 1.648 |
| Num.Obs. | 283 | 283 |
| R2 Marg. | 0.424 | 0.424 |
| R2 Cond. | 0.469 | 0.469 |
| AIC | 538.4 | 1131.6 |
| BIC | 593.1 | 1186.3 |
| ICC | 0.1 | 0.1 |
| RMSE | 0.54 | 1.61 |
Marginal Effects
pred <- predictions(hierarchical_mean_fit, newdata = datagrid(sup_parents_p3 = 1:10, region=c('east', 'west')), re.form=NA)
pred1 <- predictions(mod_mean, newdata = datagrid(sup_parents_p2 = 1:10))
pred1 |> head() |> kableExtra::kable()| rowid | estimate | std.error | statistic | p.value | s.value | conf.low | conf.high | se_acad_p1 | se_acad_p2 | se_acad_p3 | se_social_p1 | se_social_p2 | se_social_p3 | sup_friends_p1 | sup_friends_p2 | sup_friends_p3 | sup_parents_p1 | sup_parents_p3 | sup_parents_p2 | ls_mean |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 5.232276 | 0.2673967 | 19.56747 | 0 | 280.8136 | 4.708188 | 5.756363 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 1 | 6.576815 |
| 2 | 5.287553 | 0.2140733 | 24.69973 | 0 | 445.0319 | 4.867977 | 5.707128 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 2 | 6.576815 |
| 3 | 5.342829 | 0.1610972 | 33.16525 | 0 | 798.8130 | 5.027085 | 5.658574 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 3 | 6.576815 |
| 4 | 5.398106 | 0.1089765 | 49.53461 | 0 | Inf | 5.184516 | 5.611696 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 4 | 6.576815 |
| 5 | 5.453383 | 0.0599835 | 90.91472 | 0 | Inf | 5.335818 | 5.570949 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 5 | 6.576815 |
| 6 | 5.508660 | 0.0334480 | 164.69327 | 0 | Inf | 5.443103 | 5.574217 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 6 | 6.576815 |
Regression Marginal Effects
g = plot_predictions(mod_mean, condition = c("sup_parents_p3"), type = "response") + ggtitle("Counterfactual Shift of Outcome: sup_parents_p3", "Holding all else Fixed")
g1 = plot_predictions(mod_mean, condition = c("sup_friends_p1"), type = "response") + ggtitle("Counterfactual Shift of Outcome: sup_friends_p1", "Holding all else Fixed")
g2 = plot_predictions(mod_mean, condition = c("se_acad_p1"),
type = "response") + ggtitle("Counterfactual Shift of Outcome: se_acad_p1", "Holding all else Fixed")
plot <- ggarrange(g,g1,g2, ncol=1, nrow=3);Hierarchical Marginal Effects
g = plot_predictions(hierarchical_mean_fit, condition = c("sup_parents_p3", "region"), type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: sup_parents_p3", "Holding all else Fixed")
g1 = plot_predictions(hierarchical_mean_fit, condition = c("sup_friends_p1", "region"), type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: sup_friends_p1", "Holding all else Fixed")
g2 = plot_predictions(hierarchical_mean_fit, condition = c("se_acad_p1", "region"),
type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: se_acad_p1", "Holding all else Fixed")
plot <- ggarrange(g,g1,g2, ncol=1, nrow=3);Confirmatory Factor Analysis
model_measurement <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ sup_friends_p1 + sup_friends_p2 + sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS =~ ls_p1 + ls_p2 + ls_p3
"
model_measurement1 <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ a1*sup_friends_p1 + a2*sup_friends_p2 + a3*sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS =~ ls_p1 + ls_p2 + ls_p3
a1 == a2
a1 == a3
"
fit_mod <- cfa(model_measurement, data = df)
fit_mod_1<- cfa(model_measurement1, data = df)
cfa_models = list("full_measurement_model" = fit_mod,
"measurement_model_reduced" = fit_mod_1)
modelplot(cfa_models)summary(fit_mod, fit.measures = TRUE, standardized = TRUE) lavaan 0.6-18 ended normally after 46 iterations
Estimator ML
Optimization method NLMINB
Number of model parameters 40
Number of observations 283
Model Test User Model:
Test statistic 207.137
Degrees of freedom 80
P-value (Chi-square) 0.000
Model Test Baseline Model:
Test statistic 2586.651
Degrees of freedom 105
P-value 0.000
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.949
Tucker-Lewis Index (TLI) 0.933
Loglikelihood and Information Criteria:
Loglikelihood user model (H0) -4394.212
Loglikelihood unrestricted model (H1) -4290.643
Akaike (AIC) 8868.423
Bayesian (BIC) 9014.241
Sample-size adjusted Bayesian (SABIC) 8887.401
Root Mean Square Error of Approximation:
RMSEA 0.075
90 Percent confidence interval - lower 0.062
90 Percent confidence interval - upper 0.088
P-value H_0: RMSEA <= 0.050 0.001
P-value H_0: RMSEA >= 0.080 0.263
Standardized Root Mean Square Residual:
SRMR 0.056
Parameter Estimates:
Standard errors Standard
Information Expected
Information saturated (h1) model Structured
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
SUP_Parents =~
sup_parents_p1 1.000 0.938 0.876
sup_parents_p2 1.034 0.056 18.570 0.000 0.970 0.888
sup_parents_p3 0.988 0.059 16.637 0.000 0.927 0.811
SUP_Friends =~
sup_friends_p1 1.000 1.021 0.906
sup_friends_p2 0.793 0.043 18.466 0.000 0.809 0.857
sup_friends_p3 0.891 0.050 17.735 0.000 0.910 0.831
SE_Academic =~
se_acad_p1 1.000 0.697 0.880
se_acad_p2 0.806 0.049 16.278 0.000 0.561 0.819
se_acad_p3 0.952 0.058 16.491 0.000 0.663 0.828
SE_Social =~
se_social_p1 1.000 0.640 0.846
se_social_p2 0.959 0.055 17.338 0.000 0.614 0.881
se_social_p3 0.926 0.066 13.956 0.000 0.593 0.742
LS =~
ls_p1 1.000 0.677 0.729
ls_p2 0.820 0.078 10.488 0.000 0.555 0.762
ls_p3 0.744 0.109 6.809 0.000 0.504 0.459
Covariances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
SUP_Parents ~~
SUP_Friends 0.134 0.064 2.094 0.036 0.140 0.140
SE_Academic 0.219 0.046 4.717 0.000 0.335 0.335
SE_Social 0.284 0.046 6.239 0.000 0.473 0.473
LS 0.360 0.056 6.455 0.000 0.567 0.567
SUP_Friends ~~
SE_Academic 0.071 0.047 1.493 0.135 0.100 0.100
SE_Social 0.195 0.046 4.262 0.000 0.299 0.299
LS 0.184 0.053 3.500 0.000 0.267 0.267
SE_Academic ~~
SE_Social 0.274 0.036 7.525 0.000 0.613 0.613
LS 0.212 0.039 5.407 0.000 0.449 0.449
SE_Social ~~
LS 0.330 0.043 7.650 0.000 0.761 0.761
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.sup_parents_p1 0.268 0.037 7.143 0.000 0.268 0.233
.sup_parents_p2 0.253 0.038 6.595 0.000 0.253 0.212
.sup_parents_p3 0.446 0.048 9.262 0.000 0.446 0.342
.sup_friends_p1 0.228 0.040 5.663 0.000 0.228 0.179
.sup_friends_p2 0.237 0.030 7.926 0.000 0.237 0.266
.sup_friends_p3 0.371 0.042 8.811 0.000 0.371 0.310
.se_acad_p1 0.141 0.022 6.487 0.000 0.141 0.226
.se_acad_p2 0.154 0.018 8.648 0.000 0.154 0.329
.se_acad_p3 0.202 0.024 8.397 0.000 0.202 0.315
.se_social_p1 0.163 0.020 8.095 0.000 0.163 0.284
.se_social_p2 0.109 0.016 6.782 0.000 0.109 0.224
.se_social_p3 0.287 0.028 10.141 0.000 0.287 0.450
.ls_p1 0.404 0.048 8.422 0.000 0.404 0.469
.ls_p2 0.223 0.029 7.624 0.000 0.223 0.420
.ls_p3 0.951 0.085 11.123 0.000 0.951 0.789
SUP_Parents 0.880 0.098 8.937 0.000 1.000 1.000
SUP_Friends 1.042 0.111 9.405 0.000 1.000 1.000
SE_Academic 0.485 0.054 8.908 0.000 1.000 1.000
SE_Social 0.410 0.048 8.459 0.000 1.000 1.000
LS 0.458 0.072 6.323 0.000 1.000 1.000
g1 = plot_heatmap(cov(df[, drivers]))
g2 = plot_heatmap(data.frame(fitted(fit_mod)$cov), title="Model Implied Covariances", "Fitted Values")
plot <- ggarrange(g1,g2, ncol=1, nrow=2);Modification Indices
modindices(fit_mod, sort = TRUE, maximum.number = 6, standardized = FALSE, minimum.value = 5) lhs op rhs mi epc
152 sup_friends_p1 ~~ se_social_p3 19.245 0.095
68 SUP_Friends =~ ls_p2 17.491 0.181
64 SUP_Friends =~ se_social_p1 17.210 -0.136
60 SUP_Friends =~ sup_parents_p3 16.063 -0.187
210 ls_p2 ~~ ls_p3 13.931 -0.140
57 SUP_Parents =~ ls_p3 13.057 0.329
Summary Global Fit Measures
summary_df = cbind(fitMeasures(fit_mod, c("chisq", "baseline.chisq", "cfi", "aic", "bic", "rmsea","srmr")),
fitMeasures(fit_mod_1, c("chisq", "baseline.chisq", "cfi", "aic", "bic", "rmsea","srmr")))
colnames(summary_df) = c('Full Model', 'Reduced Model')
summary_df Full Model Reduced Model
chisq 207.13746909 225.18095274
baseline.chisq 2586.65112811 2586.65112811
cfi 0.94876900 0.94230416
aic 8868.42313715 8882.46662079
bic 9014.24101305 9020.99360290
rmsea 0.07493739 0.07854933
srmr 0.05595908 0.05904842
semPlot::semPaths(fit_mod, whatLabels = 'est', intercepts = FALSE)semPlot::semPaths(fit_mod_1, whatLabels = 'std', intercepts = FALSE)Comparing the empirical and implied variance-covariance matrix
heat_df = data.frame(resid(fit_mod, type = "standardized")$cov)
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")
g1 = heat_df |> mutate(value = round(value, 2)) |> ggplot(aes(x = x, y = y, fill = value)) +
geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
scale_fill_gradient2(
high = 'dodgerblue4',
mid = 'white',
low = 'firebrick2'
) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit: Full Model")
heat_df = data.frame(resid(fit_mod_1, type = "standardized")$cov)
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")
g2 = heat_df |> mutate(value = round(value, 2)) |> ggplot(aes(x = x, y = y, fill = value)) +
geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
scale_fill_gradient2(
high = 'dodgerblue4',
mid = 'white',
low = 'firebrick2'
) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit: Reduced Model")
plot <- ggarrange(g1,g2, ncol=1, nrow=2);anova(fit_mod)Chi-Squared Test Statistic (unscaled)
Df AIC BIC Chisq Chisq diff Df diff Pr(>Chisq)
Saturated 0 0.00
Model 80 8868.4 9014.2 207.14 207.14 80 0.0000000000003209 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_mod_1)Chi-Squared Test Statistic (unscaled)
Df AIC BIC Chisq Chisq diff Df diff Pr(>Chisq)
Saturated 0 0.00
Model 82 8882.5 9021 225.18 225.18 82 0.000000000000002776 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_mod, fit_mod_1)
Chi-Squared Difference Test
Df AIC BIC Chisq Chisq diff RMSEA Df diff Pr(>Chisq)
fit_mod 80 8868.4 9014.2 207.14
fit_mod_1 82 8882.5 9021.0 225.18 18.044 0.16836 2 0.0001208 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Structural Equation Models
model <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ sup_friends_p1 + sup_friends_p2 + sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS =~ ls_p1 + ls_p2 + ls_p3
# Structural model
# Regressions
LS ~ SE_Academic + SE_Social + SUP_Parents + SUP_Friends
# Residual covariances
SE_Academic ~~ SE_Social
"
fit_mod_sem <- sem(model, data = df)modelplot(fit_mod_sem)semPlot::semPaths(fit_mod_sem, whatLabels = 'std', intercepts = FALSE)heat_df = data.frame(resid(fit_mod_sem, type = "standardized")$cov)
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")
heat_df |> mutate(value = round(value, 2)) |> ggplot(aes(x = x, y = y, fill = value)) +
geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
scale_fill_gradient2(
high = 'dodgerblue4',
mid = 'white',
low = 'firebrick2'
) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit")```
Citation
BibTeX citation:
@online{forde,
author = {Nathaniel Forde},
title = {Confirmatory {Factor} {Analysis} and {Structural} {Equation}
{Models}},
langid = {en}
}
For attribution, please cite this work as:
Nathaniel Forde. n.d. “Confirmatory Factor Analysis and Structural
Equation Models.”